import pandas as pd
import os
import sys
import time
import numpy as np
import datetime
from clickhouse_driver import Client

#导入数据库
def batch_process(path):
    client = Client(host="192.168.50.40", user="default", password="9defbcg", port="9090", database="ods")
    print("start batch process trade day data...")
    #读取csv数据，必须指定类型，否则会推测为int，导致异常
    df = pd.read_csv(path, dtype={'采集日期': str, '交易日期': str,'股票代码': str,'股票名称': str,
                                  '最新价':float, '涨跌幅':float, '涨跌额':float,
                                  '成交量(手)':np.int64, '成交额':np.float64, '振幅':float,
                                  '换手率':float, '市盈率(当日动态)':float,
                                  '量比':float, '最高':float, '最低':float,
                                  '今开':float, '昨收':float, '总市值':np.float64,
                                  '流通市值':np.float64, '市净率':float, '市盈率TTM':float}
                     , encoding="UTF-8", index_col=False)  # 取消索引列，否则错位

    print(df.head())
    # print(len(df))
    # df = df[df["price"] != '-']
    # print(len(df))


    client.execute('INSERT INTO ods_stock_trade_day_df VALUES', df.values.tolist())
    #关闭连接 TODO
    print("batch process stock trade day data finished!")

# 必须有main方法，调度才能执行
def main():
    print("etl_day start....")
    path = sys.argv[1]
    #批量导入到CK
    batch_process(path)
    # batch_process("G:\\task-pipeline\\df_day\\all_day_20230817.csv")


if __name__ == '__main__':
    main()



